1 Loading libraries

library(biomaRt)
library(edgeR)
library(limma)
library(ggplot2)
library(ggrepel)
library(clusterProfiler)
library(org.Hs.eg.db)
library(plotly)
library(dplyr)
library(DT)
library(viper)
library(dorothea)
library(ggpubr)

2 Importing data

gene_count<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/read_counts.rds")
genemart<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/genemart.rds")
gse39612_tumor.vs.normal<-read.delim2(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/gse39612_tumor_vs_normal_DEGs_analysis.csv", sep = ",", row.names = 1)

3 Performing DEGs analysis on pyrvinium treated MCC cell lines

3.1 Dividing samples to WaGa and MKL1 cells separately

df_MKL1<-gene_count[,1:15]
print(head(df_MKL1))
##                   Mcon1 Mcon2 Mcon3 MD24h1 MD24h2 MD24h3 MD6h1 MD6h2 MD6h3
## ENSG00000223972.5     5     4     7      3     15      9    19     3     8
## ENSG00000227232.5   795   734   718    566    872    715   903   937   621
## ENSG00000278267.1    12     8     3      9     15      4    12     2     3
## ENSG00000243485.5     2     5     4      1      2      0     4    10     2
## ENSG00000284332.1     0     0     0      0      0      0     0     0     0
## ENSG00000237613.2     0     0     0      0      0      0     0     2     0
##                   MP24h1 MP24h2 MP24h3 MP6h1 MP6h2 MP6h3
## ENSG00000223972.5      8      7      7     4     1     4
## ENSG00000227232.5    781    818    721   618   921   770
## ENSG00000278267.1      9      7      4    11     6     2
## ENSG00000243485.5      3      1      9     1     2     0
## ENSG00000284332.1      0      0      0     0     0     0
## ENSG00000237613.2      0      0      0     0     0     0
df_WaGa<-gene_count[, 16:30]
print(head(df_WaGa))
##                   Wcon1 Wcon2 Wcon3 WD24h1 WD24h2 WD24h3 WD6h1 WD6h2 WD6h3
## ENSG00000223972.5    30    27    29     29     25     30    32    21    32
## ENSG00000227232.5   749   928  1018   1362   1215    995  1006  1274  1426
## ENSG00000278267.1     0     4     3     14     17     11     4    17    10
## ENSG00000243485.5    21     7     1     17      8      6     7    10    14
## ENSG00000284332.1     0     0     0      0      0      0     0     0     0
## ENSG00000237613.2     0     0     0      0      0      0     0     0     0
##                   WP24h1 WP24h2 WP24h3 WP6h1 WP6h2 WP6h3
## ENSG00000223972.5     22     22     14    37    41    32
## ENSG00000227232.5   1319   1690   1529  1196  1227  1040
## ENSG00000278267.1      6      7     15     5     6     9
## ENSG00000243485.5     12      8      7    22     6     8
## ENSG00000284332.1      0      0      0     0     0     0
## ENSG00000237613.2      0      0      0     0     0     1

3.2 Performing normalization and DEGs analysis for MKL1 samples

group_MKL1<-factor(c(rep("Mcon",3), rep("MD24h", 3), rep("MD6h", 3), rep("MP24h",3), rep("MP6h",3)))
ensemblid_MKL1<-rownames(df_MKL1)
ensemblid_MKL1<-unlist(lapply(ensemblid_MKL1, function(x) strsplit(x, "[.]")[[1]][1]))
gene_MKL1<- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","hgnc_symbol","chromosome_name","band","description","start_position","end_position", "entrezgene_id"),
                   values=ensemblid_MKL1,mart=genemart)

y<- DGEList(counts=df_MKL1,group=group_MKL1)
keep <- rowSums(cpm(y)>1) >= length(group_MKL1)/5 #cpm: compute counts per million or reads per kilobase per million#
y <- y[keep, ,keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
mm <- model.matrix(~0 + group_MKL1)
temp <- voom(y,mm, plot = T) ## voom + TMM
edat_MKL1 = temp$E
edat_MKL1<-as.data.frame(edat_MKL1)
fit <- lmFit(temp, mm)
efit <- eBayes(fit)
plotSA(efit, main="Final model: Mean-variance trend")

edat_MKL1[,"ensembl_gene_id"]<-unlist(lapply(rownames(edat_MKL1), function(x) strsplit(x, "[.]")[[1]][1]))

edat_MKL1_annotated<-left_join(edat_MKL1, gene_MKL1, by = "ensembl_gene_id")
edat_MKL1_annotated<-edat_MKL1_annotated[edat_MKL1_annotated$hgnc_symbol != "",]
edat_MKL1_annotated<-edat_MKL1_annotated[!duplicated(edat_MKL1_annotated$hgnc_symbol),]
edat_MKL1_annotated<-na.omit(edat_MKL1_annotated)
rownames(edat_MKL1_annotated)<-edat_MKL1_annotated$hgnc_symbol
edat_MKL1_annotated<-edat_MKL1_annotated[,c(1:15)]
#saveRDS(edat_MKL1_annotated, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_MKL1_realigned_normalized_edat.RDS")

design <- model.matrix(~0 + group_MKL1)
colnames(design)<-gsub("group_MKL1", "", colnames(design))
contr.matrix <- makeContrasts(
  MP6hvsMD6h = MP6h-MD6h, 
  MP24hvsMD24h = MP24h-MD24h, 
  MPvsMD = (MP6h+MP24h)-(MD6h+MD24h),
  MDvsMcon = (MD6h+MD24h)-Mcon,
  MPvsMcon= (MP6h+MP24h)-Mcon,
  levels = colnames(design))
colnames(fit$coefficients)<-gsub("group_MKL1","",colnames(fit$coefficients))
vfit <- contrasts.fit(fit, contrasts=contr.matrix)
efit <- eBayes(vfit)

DEGs_df_all_comparisons_MKL1<-list()
for (i in 1:length(colnames(efit$coefficients))){
  coef_name<-colnames(efit$coefficients)[i]
  print(coef_name)
  DEGs_df<-topTable(efit, coef = coef_name, n = Inf)
  rownames(DEGs_df)<-unlist(lapply(rownames(DEGs_df), function(x) strsplit(x, "[.]")[[1]][1]))
  DEGs_df$ensembl_gene_id<-rownames(DEGs_df)
  DEGs_df$hgnc_symbol<-gene_MKL1[match(rownames(DEGs_df), gene_MKL1$ensembl_gene_id), "hgnc_symbol"]
  DEGs_df$entrezgene_id<-gene_MKL1[match(rownames(DEGs_df), gene_MKL1$ensembl_gene_id), "entrezgene_id"]
  DEGs_df_all_comparisons_MKL1[[i]]<-DEGs_df
} #loop to generate a df list for DEGs without any filter.
names(DEGs_df_all_comparisons_MKL1)<-colnames(efit$coefficients)
#saveRDS(DEGs_df_all_comparisons_MKL1, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_MKL1.RDS")

3.3 Performing normalization and DEGs analysis for WaGa samples

group_Waga<-factor(c(rep("Wcon", 3), rep("WD24h", 3), rep("WD6h",3), rep("WP24h", 3), rep("WP6h",3)))
ensemblid_waga<-rownames(df_WaGa)
ensemblid_waga<-unlist(lapply(ensemblid_waga, function(x) strsplit(x, "[.]")[[1]][1]))
genes_waga <- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","hgnc_symbol","chromosome_name","band","description","start_position","end_position","entrezgene_id"),
              values=ensemblid_waga,mart= genemart) 

y<- DGEList(counts=df_WaGa,group=group_Waga)
keep <- rowSums(cpm(y)>1) >= length(group_Waga)/5 #cpm: compute counts per million or reads per kilobase per million#
y <- y[keep, ,keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
mm <- model.matrix(~0 + group_Waga)
temp <- voom(y,mm, plot = T) ## voom + TMM
edat_waga = temp$E
edat_waga<-as.data.frame(edat_waga)
fit <- lmFit(temp, mm)
efit <- eBayes(fit)
plotSA(efit, main="Final model: Mean-variance trend")

edat_waga[,"ensembl_gene_id"]<-unlist(lapply(rownames(edat_waga), function(x) strsplit(x, "[.]")[[1]][1]))
edat_waga_annotated<-left_join(edat_waga, genes_waga, by = "ensembl_gene_id")
edat_waga_annotated<-edat_waga_annotated[edat_waga_annotated$hgnc_symbol != "",]
edat_waga_annotated<-edat_waga_annotated[!duplicated(edat_waga_annotated$hgnc_symbol),]
edat_waga_annotated<-na.omit(edat_waga_annotated)
rownames(edat_waga_annotated)<-edat_waga_annotated$hgnc_symbol
edat_waga_annotated<-edat_waga_annotated[,c(1:15)]
#saveRDS(edat_waga_annotated, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_waga_realigned_normalized_edat.RDS")

design <- model.matrix(~0 + group_Waga)
colnames(design)<-gsub("group_Waga", "", colnames(design))
contr.matrix <- makeContrasts(
  WP6hvsWD6h = WP6h-WD6h, 
  WP24hvsWD24h = WP24h-WD24h, 
  WPvsWD = (WP6h+WP24h)-(WD6h+WD24h),
  WDvsWcon = (WD6h+WD24h)-Wcon,
  WPvsWcon= (WP6h+WP24h)-Wcon,
  levels = colnames(design))
colnames(fit$coefficients)<-gsub("group_Waga","",colnames(fit$coefficients))
vfit <- contrasts.fit(fit, contrasts=contr.matrix)
efit <- eBayes(vfit)

DEGs_df_all_comparisons_waga<-list()
for (i in 1:length(colnames(efit$coefficients))){
  coef_name<-colnames(efit$coefficients)[i]
  print(coef_name)
  DEGs_df<-topTable(efit, coef = coef_name, n = Inf)
  rownames(DEGs_df)<-unlist(lapply(rownames(DEGs_df), function(x) strsplit(x, "[.]")[[1]][1]))
  DEGs_df$ensembl_gene_id<-rownames(DEGs_df)
  DEGs_df$hgnc_symbol<-genes_waga[match(rownames(DEGs_df), genes_waga$ensembl_gene_id), "hgnc_symbol"]
  DEGs_df$entrezgene_id<-genes_waga[match(rownames(DEGs_df), genes_waga$ensembl_gene_id), "entrezgene_id"]
  DEGs_df_all_comparisons_waga[[i]]<-DEGs_df
} #loop to generate a df list for DEGs without any filter.
names(DEGs_df_all_comparisons_waga)<-colnames(efit$coefficients)
#saveRDS(DEGs_df_all_comparisons_waga, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_waga.RDS")

3.4 Visualizing DEGs in volcano plots

DEGs_df_all_comparisons_waga<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_waga.RDS")
DEGs_df_all_comparisons_MKL1<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_MKL1.RDS")

generate_volcano_plot_for_Waga_cell<-function(samplename){
  volc<-data.frame(log2fc = DEGs_df_all_comparisons_waga[[samplename]][,"logFC"],
                   sig = -1*log(as.numeric(DEGs_df_all_comparisons_waga[[samplename]][,"adj.P.Val"]), base = 10),
                   genes = DEGs_df_all_comparisons_waga[[samplename]][, "hgnc_symbol"])
  volc$de <- "NS"
  volc$de[volc$sig>-1*log10(0.05)] <- "Significant"
  volc$de[abs(volc$log2fc) >= 1 & volc$sig >10]<-"label"
  volc$delabel <- volc$genes
  volc[which(volc$de != "label"), "delabel"]<- NA
  id<-samplename
  p<-ggplot(volc,aes(x=log2fc,y=sig,col=de))+
    theme_linedraw()+
    theme_light()+
    geom_point(col = ifelse(volc$de == "NS", "#bdbdbd", ifelse(volc$de =="label","#FF8000","#9ecae1")))+
    geom_text_repel(size = 2.5, colour = "black", aes(label = ifelse(delabel != "<NA>", delabel, "")), max.overlaps = 30) +
    geom_vline(xintercept=c(-1, 1), col="red",linetype=4)+
    geom_hline(yintercept=-1*log10(0.05),col="black", linetype=4)+
    ggtitle(as.character(id))+
    ylab("-log10(padj)") +
    xlab(paste0(id,"\n Log2(Fold Change)")) #+
  #xlim(-3,3)+
  #ylim(0,40)
  ggsave(p, width = 14, height = 10, file = paste0("/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/pyrvinium_treated_", samplename, "_DEGs_volcano_plot.pdf"))
  return(p)
}

generate_volcano_plot_for_Waga_cell("WP24hvsWD24h")

generate_volcano_plot_for_MKL1_cell<-function(samplename){
  volc<-data.frame(log2fc = DEGs_df_all_comparisons_MKL1[[samplename]][,"logFC"],
                   sig = -1*log(as.numeric(DEGs_df_all_comparisons_MKL1[[samplename]][,"adj.P.Val"]), base = 10),
                   genes = DEGs_df_all_comparisons_MKL1[[samplename]][, "hgnc_symbol"])
  volc$de <- "NS"
  volc$de[volc$sig>-1*log10(0.05)] <- "Significant"
  volc$de[abs(volc$log2fc) >= 1 & volc$sig >10]<-"label"
  volc$delabel <- volc$genes
  volc[which(volc$de != "label"), "delabel"]<- NA
  id<-samplename
  p<-ggplot(volc,aes(x=log2fc,y=sig,col=de))+
    theme_linedraw()+
    theme_light()+
    geom_point(col = ifelse(volc$de == "NS", "#bdbdbd", ifelse(volc$de =="label","#FF8000","#9ecae1")))+
    geom_text_repel(size = 2.5, colour = "black", aes(label = ifelse(delabel != "<NA>", delabel, "")), max.overlaps = 30) +
    geom_vline(xintercept=c(-1, 1), col="red",linetype=4)+
    geom_hline(yintercept=-1*log10(0.05),col="black", linetype=4)+
    ggtitle(as.character(id))+
    ylab("-log10(padj)") +
    xlab(paste0(id,"\n Log2(Fold Change)"))
  ggsave(p, width = 14, height = 10, file= paste0("/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/pyrvinium_treated_", samplename, "_DEGs_volcano_plot.pdf"))
  return(p)
}

generate_volcano_plot_for_MKL1_cell("MP24hvsMD24h")

3.5 Overlapping GSE39612 MCC tumor vs. normal skin significant DEGs with pyrvinium vs.dmso treated MCC cell lines significant DEGs and visualizing with volcano plot

WP24hvsWD24h<-DEGs_df_all_comparisons_waga$WP24hvsWD24h
WP24hvsWD24h_sig_upregulated_DEGs<-WP24hvsWD24h[WP24hvsWD24h$logFC>=1 & WP24hvsWD24h$adj.P.Val <=0.05,]
WP24hvsWD24h_sig_downregulated_DEGs<-WP24hvsWD24h[WP24hvsWD24h$logFC<=-1 & WP24hvsWD24h$adj.P.Val <=0.05,]

gse39612_tumor.vs.normal$logFC<-as.numeric(gse39612_tumor.vs.normal$logFC)
gse39612_tumor.vs.normal$adj.P.Val<-as.numeric(gse39612_tumor.vs.normal$adj.P.Val)
gse39612_sig_upregulated_DEGs<-gse39612_tumor.vs.normal[gse39612_tumor.vs.normal$logFC>=1,]
gse39612_sig_upregulated_DEGs<-gse39612_sig_upregulated_DEGs[gse39612_sig_upregulated_DEGs$adj.P.Val<=0.05,]
gse39612_sig_downregulated_DEGs<-gse39612_tumor.vs.normal[gse39612_tumor.vs.normal$logFC<=-1, ]
gse39612_sig_downregulated_DEGs<-gse39612_sig_downregulated_DEGs[gse39612_sig_downregulated_DEGs$adj.P.Val<=0.05,]

GSE39612_upregulated_DEGs_decreased_in_pyr_waga_DEGs<-intersect(rownames(gse39612_sig_upregulated_DEGs), WP24hvsWD24h_sig_downregulated_DEGs$hgnc_symbol)
GSE39612_downregulated_DEGs_increased_in_pyr_waga_DEGs<-intersect(rownames(gse39612_sig_downregulated_DEGs), WP24hvsWD24h_sig_upregulated_DEGs$hgnc_symbol)

volc<-data.frame(log2fc = DEGs_df_all_comparisons_waga[["WP24hvsWD24h"]][,"logFC"],
                 sig = -1*log10(DEGs_df_all_comparisons_waga[["WP24hvsWD24h"]][,"adj.P.Val"]),
                 genes = DEGs_df_all_comparisons_waga[["WP24hvsWD24h"]][, "hgnc_symbol"])
volc$de <- "NS"
volc$de[volc$sig>-1*log10(0.05)] <- "Significant"
annotate_down_df<-volc[volc$genes %in% GSE39612_upregulated_DEGs_decreased_in_pyr_waga_DEGs,]
annotate_up_df<-volc[volc$genes %in% GSE39612_downregulated_DEGs_increased_in_pyr_waga_DEGs,]

p<-ggplot(volc,aes(x=log2fc,y=sig))+
  theme_linedraw()+
  theme_light()+
  geom_point(col = "grey50", size = 3, alpha = 0.3)+
  #geom_text_repel(size = 2.5, colour = "black", aes(label = ifelse(delabel != "<NA>", delabel, ""))) +
  geom_point(data = annotate_down_df , # New layer containing data subset il_genes       
             size = 3,
             shape = 21,
             alpha = 0.8,
             fill = "firebrick",
             colour = "black")+
  geom_text_repel(data = annotate_down_df, aes(label = genes), size = 2.8, fontface = "bold", colour = "black", force = 1, max.overlaps = 20) +
  geom_point(data = annotate_up_df, # New layer containing data subset il_genes       
             size = 3,
             shape = 21,
             fill = "blue",
             alpha = 0.8,
             colour = "black")+
  geom_text_repel(data = annotate_up_df, aes(label = genes), size = 2.8, fontface = "bold", colour = "black", force = 1, box.padding = 0.5, max.overlaps = 20) +
  geom_vline(xintercept=c(-1, 1), col="red",linetype=4)+
  geom_hline(yintercept=-1*log10(0.05),col="black", linetype=4)+
  ylab("-log10(padj)") +
  xlab("WaGa pyrvinium treated vs. control \n Log2(Fold Change)")
p<-p+theme(axis.title=element_text(size=20,face="bold"),
           axis.text = element_text(size =15, face="bold"))
p

#ggsave(p, width = 18, height = 12, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/pyrvinium_WaGa_24h_gse39612_overlapped_DEGs_volcano_plot.pdf")
MP24hvsMD24h<-DEGs_df_all_comparisons_MKL1$MP24hvsMD24h
MP24hvsMD24h_sig_upregulated_DEGs<-MP24hvsMD24h[MP24hvsMD24h$logFC>=1 & MP24hvsMD24h$adj.P.Val <=0.05,]
MP24hvsMD24h_sig_downregulated_DEGs<-MP24hvsMD24h[MP24hvsMD24h$logFC<=-1 & MP24hvsMD24h$adj.P.Val <=0.05,]

GSE39612_upregulated_DEGs_decreased_in_pyr_MKL1_DEGs<-intersect(rownames(gse39612_sig_upregulated_DEGs), MP24hvsMD24h_sig_downregulated_DEGs$hgnc_symbol)
GSE39612_downregulated_DEGs_increased_in_pyr_MKL1_DEGs<-intersect(rownames(gse39612_sig_downregulated_DEGs), MP24hvsMD24h_sig_upregulated_DEGs$hgnc_symbol)

volc<-data.frame(log2fc = DEGs_df_all_comparisons_MKL1[["MP24hvsMD24h"]][,"logFC"],
                 sig = -1*log10(DEGs_df_all_comparisons_MKL1[["MP24hvsMD24h"]][,"adj.P.Val"]),
                 genes = DEGs_df_all_comparisons_MKL1[["MP24hvsMD24h"]][, "hgnc_symbol"])
volc$de <- "NS"
volc$de[volc$sig>-1*log10(0.05)] <- "Significant"
annotate_down_df<-volc[volc$genes %in% GSE39612_upregulated_DEGs_decreased_in_pyr_MKL1_DEGs,]
annotate_up_df<-volc[volc$genes %in% GSE39612_downregulated_DEGs_increased_in_pyr_MKL1_DEGs,]

p<-ggplot(volc,aes(x=log2fc,y=sig))+
  theme_linedraw()+
  theme_light()+
  geom_point(col = "grey50", size = 3, alpha = 0.3)+
  #geom_text_repel(size = 2.5, colour = "black", aes(label = ifelse(delabel != "<NA>", delabel, ""))) +
  geom_point(data = annotate_down_df , # New layer containing data subset il_genes       
             size = 3,
             shape = 21,
             alpha = 0.8,
             fill = "firebrick",
             colour = "black")+
  geom_text_repel(data = annotate_down_df, aes(label = genes), size = 2.8, fontface = "bold", colour = "black", force = 1, max.overlaps = 20) +
  geom_point(data = annotate_up_df, # New layer containing data subset il_genes       
             size = 3,
             shape = 21,
             fill = "blue",
             alpha = 0.8,
             colour = "black")+
  geom_text_repel(data = annotate_up_df, aes(label = genes), size = 2.8, fontface = "bold", colour = "black", force = 1, box.padding = 0.5, max.overlaps = 20) +
  geom_vline(xintercept=c(-1, 1), col="red",linetype=4)+
  geom_hline(yintercept=-1*log10(0.05),col="black", linetype=4)+
  ylab("-log10(padj)") +
  xlab("MKL1 pyrvinium treated vs. control \n Log2(Fold Change)")
p<-p+theme(axis.title=element_text(size=20,face="bold"),
           axis.text = element_text(size =15, face="bold"))
p

#ggsave(p, width = 18, height = 12, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/pyrvinium_MKL1_24h_gse39612_overlapped_DEGs_volcano_plot.pdf")

4 Performing GO-over representation analysis on DEGs from different cell lines and time of treatment

DEGs_df_all_comparisons_waga<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_waga.RDS")
WaGa_6h_up<-DEGs_df_all_comparisons_waga$WP6hvsWD6h[DEGs_df_all_comparisons_waga$WP6hvsWD6h$adj.P.Val <= 0.05 & 
                                                      DEGs_df_all_comparisons_waga$WP6hvsWD6h$logFC >= 1,]
WaGa_6h_down<-DEGs_df_all_comparisons_waga$WP6hvsWD6h[DEGs_df_all_comparisons_waga$WP6hvsWD6h$adj.P.Val <= 0.05 & DEGs_df_all_comparisons_waga$WP6hvsWD6h$logFC <= -1,]
WaGa_6h_up[,"Cell_line"]<-rep("WaGa", nrow(WaGa_6h_up))
WaGa_6h_up[,"time"]<-rep("6h", nrow(WaGa_6h_up))
WaGa_6h_up[,"group"]<-rep("up", nrow(WaGa_6h_up))

WaGa_6h_down[,"Cell_line"]<-rep("WaGa", nrow(WaGa_6h_down))
WaGa_6h_down[,"time"]<-rep("6h", nrow(WaGa_6h_down))
WaGa_6h_down[,"group"]<-rep("down", nrow(WaGa_6h_down))

WaGa_24h_up<-DEGs_df_all_comparisons_waga$WP24hvsWD24h[DEGs_df_all_comparisons_waga$WP24hvsWD24h$adj.P.Val <= 0.05 & DEGs_df_all_comparisons_waga$WP24hvsWD24h$logFC >=1, ]
WaGa_24h_down<-DEGs_df_all_comparisons_waga$WP24hvsWD24h[DEGs_df_all_comparisons_waga$WP24hvsWD24h$adj.P.Val <= 0.05 & DEGs_df_all_comparisons_waga$WP24hvsWD24h$logFC <=-1, ]

WaGa_24h_up[,"Cell_line"]<-rep("WaGa", nrow(WaGa_24h_up))
WaGa_24h_up[,"time"]<-rep("24h", nrow(WaGa_24h_up))
WaGa_24h_up[,"group"]<-rep("up", nrow(WaGa_24h_up))

WaGa_24h_down[,"Cell_line"]<-rep("WaGa", nrow(WaGa_24h_down))
WaGa_24h_down[,"time"]<-rep("24h", nrow(WaGa_24h_down))
WaGa_24h_down[,"group"]<-rep("down", nrow(WaGa_24h_down))


DEGs_df_all_comparisons_MKL1<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_DEGs_all_realigned_MKL1.RDS")
MKL1_6h_up<-DEGs_df_all_comparisons_MKL1$MP6hvsMD6h[DEGs_df_all_comparisons_MKL1$MP6hvsMD6h$adj.P.Val<=0.05 & 
                                                      DEGs_df_all_comparisons_MKL1$MP6hvsMD6h$logFC >=1 , ]
MKL1_6h_down<-DEGs_df_all_comparisons_MKL1$MP6hvsMD6h[DEGs_df_all_comparisons_MKL1$MP6hvsMD6h$adj.P.Val<=0.05 & 
                                                        DEGs_df_all_comparisons_MKL1$MP6hvsMD6h$logFC <= -1 , ]
MKL1_6h_up[,"Cell_line"]<-rep("MKL1", nrow(MKL1_6h_up))
MKL1_6h_up[,"time"]<-rep("6h", nrow(MKL1_6h_up))
MKL1_6h_up[,"group"]<-rep("up", nrow(MKL1_6h_up))

MKL1_6h_down[,"Cell_line"]<-rep("MKL1", nrow(MKL1_6h_down))
MKL1_6h_down[,"time"]<-rep("6h", nrow(MKL1_6h_down))
MKL1_6h_down[,"group"]<-rep("down", nrow(MKL1_6h_down))

MKL1_24h_up<-DEGs_df_all_comparisons_MKL1$MP24hvsMD24h[DEGs_df_all_comparisons_MKL1$MP24hvsMD24h$adj.P.Val<=0.05 & DEGs_df_all_comparisons_MKL1$MP24hvsMD24h$logFC >=1 , ]
MKL1_24h_down<-DEGs_df_all_comparisons_MKL1$MP24hvsMD24h[DEGs_df_all_comparisons_MKL1$MP24hvsMD24h$adj.P.Val<=0.05 & DEGs_df_all_comparisons_MKL1$MP24hvsMD24h$logFC <= -1 , ]
MKL1_24h_up[,"Cell_line"]<-rep("MKL1", nrow(MKL1_24h_up))
MKL1_24h_up[,"time"]<-rep("24h", nrow(MKL1_24h_up))
MKL1_24h_up[,"group"]<-rep("up", nrow(MKL1_24h_up))

MKL1_24h_down[,"Cell_line"]<-rep("MKL1", nrow(MKL1_24h_down))
MKL1_24h_down[,"time"]<-rep("24h", nrow(MKL1_24h_down))
MKL1_24h_down[,"group"]<-rep("down", nrow(MKL1_24h_down))

pyr_treated_DEGs<-list(WaGa_6h_up, WaGa_24h_up, MKL1_6h_up, MKL1_24h_up, WaGa_6h_down, WaGa_24h_down, MKL1_6h_down, MKL1_24h_down)
pyr_treated_sig_DEGs<-Reduce(rbind, pyr_treated_DEGs)
pyr_treated_sig_DEGs<-na.omit(pyr_treated_sig_DEGs)
pyr_treated_sig_FC<-pyr_treated_sig_DEGs$logFC
names(pyr_treated_sig_FC)<-pyr_treated_sig_DEGs$entrezgene_id
pyr_treated_sig_DEGs<-pyr_treated_sig_DEGs[,c("entrezgene_id", "Cell_line", "time", "group", "logFC")]
print(head(pyr_treated_sig_DEGs))
##                 entrezgene_id Cell_line time group    logFC
## ENSG00000101255         57761      WaGa   6h    up 1.907056
## ENSG00000124762          1026      WaGa   6h    up 1.907833
## ENSG00000130766         83667      WaGa   6h    up 1.569798
## ENSG00000148400          4851      WaGa   6h    up 1.180810
## ENSG00000135269         26136      WaGa   6h    up 1.332035
## ENSG00000122861          5328      WaGa   6h    up 4.024807
pyr_go_result<-compareCluster(entrezgene_id~Cell_line+time,
                          data = pyr_treated_sig_DEGs,
                          fun = enrichGO,
                          OrgDb = "org.Hs.eg.db",
                          ont = "BP",
                          pAdjustMethod = "BH",
                          pvalueCutoff  = 0.05,
                          qvalueCutoff  = 0.25
                          )
#saveRDS(pyr_go_result, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/pyr_treated_go_terms_enrichment.rds")

4.1 Visualizing the GO Terms over-representative analysis

pyr_go_result<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/pyr_treated_go_terms_enrichment.rds")
GO_24H<-pyr_go_result@compareClusterResult[pyr_go_result@compareClusterResult$time == "24h", ]
datatable(data = pyr_go_result@compareClusterResult)
require(DOSE)
require(enrichplot)
require(viridis)

pyr_go_result@compareClusterResult$Cell_line = factor(pyr_go_result@compareClusterResult$Cell_line, levels = c("WaGa", "MKL1"))

p = dotplot(pyr_go_result, 
            x = "time", 
            font.size=22, 
            size = "Count",
            by = "p.adjust",
            title = "Top Gene Ontolgy terms of pyrvinium Treated MCC Cell Lines") + 
  facet_grid(~Cell_line) + 
  scale_color_viridis(option = "viridis", direction = -1) +
  scale_fill_viridis(option = "viridis", direction = -1) +
  scale_x_discrete(limits = c("6h", "24h"))+
  scale_size_area(max_size = 20)+
  theme(strip.text.x = element_text(size = 22, face = "bold"),
        axis.text.x = element_text(size = 22, face = "bold"),
        axis.title = element_text(size = 22, face = "bold"),
        text = element_text(size = 22, face = "bold"),
        plot.title = element_text(hjust = 0.5, size = 30))
p

5 Performing KEGG pathway analysis on pyrvinium treated MCC cell lines

5.1 KEGG pathway analysis of UP regulated genes by pyrvinium treatment (24 hours)

#For MKL1 24h ==================
pyr_treated_MKL1_24h_sig_DEGs_up<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "up" & pyr_treated_sig_DEGs$Cell_line == "MKL1" &   pyr_treated_sig_DEGs$time == "24h",]
pyr_treated_MKL1_24h_sig_FC_up<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "up" & pyr_treated_sig_DEGs$Cell_line == "MKL1" & 
                                                       pyr_treated_sig_DEGs$time == "24h","logFC"]
names(pyr_treated_MKL1_24h_sig_FC_up)<-pyr_treated_MKL1_24h_sig_DEGs_up$entrezgene_id

pyr_kegg_MKL1_24h_result_up<-enrichKEGG(pyr_treated_MKL1_24h_sig_DEGs_up$entrezgene_id,
                                pAdjustMethod = "BH",
                                pvalueCutoff  = 0.05,
                                qvalueCutoff  = 0.25
)

#print(head(pyr_kegg_MKL1_24h_result_up@result))

edox_up_MKL1_24h<-setReadable(pyr_kegg_MKL1_24h_result_up, "org.Hs.eg.db", "ENTREZID")

p_MKL1_24h_up<-enrichplot::cnetplot(edox_up_MKL1_24h,
             foldChange = pyr_treated_MKL1_24h_sig_FC_up,
             layout = "kk",
             color_category = "#c6dbef",
             cex_category = 2,
             showCategory = 8,
             cex_label_category = 1.2,
             cex_label_gene = 1.2,
             categortSize = "pvalue")+
  scale_color_gradient2(name='fold change', midpoint = 4,  high='#de2d26', mid='#fc9272', low='#fff5f0')
p_MKL1_24h_up<- p_MKL1_24h_up + theme(strip.text.x = element_text(size = 22, face = "bold"),
                text = element_text(size = 22, face = "bold"))

p_MKL1_24h_up

#For WaGa 24h ================
pyr_treated_WaGa_24h_sig_DEGs_up<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "up" & pyr_treated_sig_DEGs$Cell_line == "WaGa" &   pyr_treated_sig_DEGs$time == "24h",]
pyr_treated_WaGa_24h_sig_FC_up<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "up" & pyr_treated_sig_DEGs$Cell_line == "WaGa" & 
                                                       pyr_treated_sig_DEGs$time == "24h","logFC"]
names(pyr_treated_WaGa_24h_sig_FC_up)<-pyr_treated_WaGa_24h_sig_DEGs_up$entrezgene_id

pyr_kegg_WaGa_24h_result_up<-enrichKEGG(pyr_treated_WaGa_24h_sig_DEGs_up$entrezgene_id,
                                pAdjustMethod = "BH",
                                pvalueCutoff  = 0.05,
                                qvalueCutoff  = 0.25
)

#print(head(pyr_kegg_WaGa_24h_result_up@result))

edox_up_WaGa_24h<-setReadable(pyr_kegg_WaGa_24h_result_up, "org.Hs.eg.db", "ENTREZID")

p_WaGa_24h_up<-enrichplot::cnetplot(edox_up_WaGa_24h,
             foldChange = pyr_treated_WaGa_24h_sig_FC_up,
             layout = "kk",
             color_category = "#c6dbef",
             cex_category = 2,
             showCategory = 8,
             cex_label_category = 1.2,
             cex_label_gene = 1.2,
             categortSize = "pvalue")+
  scale_color_gradient2(name='fold change', midpoint = 4,  high='#de2d26', mid='#fc9272', low='#fff5f0')
p_WaGa_24h_up<- p_WaGa_24h_up + theme(strip.text.x = element_text(size = 22, face = "bold"),
                text = element_text(size = 22, face = "bold"))

p_WaGa_24h_up

5.2 KEGG pathway analysis of DOWN regulated genes by pyrvinium treatment (24 hours)

#For MKL1 24h ==================
pyr_treated_MKL1_24h_sig_DEGs_down<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "down" & pyr_treated_sig_DEGs$Cell_line == "MKL1" &   pyr_treated_sig_DEGs$time == "24h",]
pyr_treated_MKL1_24h_sig_FC_down<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "down" & pyr_treated_sig_DEGs$Cell_line == "MKL1" & 
                                                       pyr_treated_sig_DEGs$time == "24h","logFC"]
names(pyr_treated_MKL1_24h_sig_FC_down)<-pyr_treated_MKL1_24h_sig_DEGs_down$entrezgene_id

pyr_kegg_MKL1_24h_result_down<-enrichKEGG(pyr_treated_MKL1_24h_sig_DEGs_down$entrezgene_id,
                                pAdjustMethod = "BH",
                                pvalueCutoff  = 0.05,
                                qvalueCutoff  = 0.25
)
#print(head(pyr_kegg_MKL1_24h_result_down@result))

edox_down_MKL1_24h<-setReadable(pyr_kegg_MKL1_24h_result_down, "org.Hs.eg.db", "ENTREZID")

p_MKL1_24h_down<-enrichplot::cnetplot(edox_down_MKL1_24h,
             foldChange = pyr_treated_MKL1_24h_sig_FC_down,
             layout = "kk",
             color_category = "#edf8e9",
             cex_category = 2,
             showCategory = 8,
             cex_label_category = 1.2,
             cex_label_gene = 1.2,
             categortSize = "pvalue")+
  scale_color_gradient2(name='fold change', midpoint = -4, high='#deebf7', mid='#9ecae1', low='#08519c')
p_MKL1_24h_down<- p_MKL1_24h_down + theme(strip.text.x = element_text(size = 22, face = "bold"),
                text = element_text(size = 22, face = "bold"))

p_MKL1_24h_down

#For WaGa 24h ================
pyr_treated_WaGa_24h_sig_DEGs_down<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "down" & pyr_treated_sig_DEGs$Cell_line == "WaGa" &   pyr_treated_sig_DEGs$time == "24h",]
pyr_treated_WaGa_24h_sig_FC_down<-pyr_treated_sig_DEGs[pyr_treated_sig_DEGs$group == "down" & pyr_treated_sig_DEGs$Cell_line == "WaGa" & 
                                                       pyr_treated_sig_DEGs$time == "24h","logFC"]
names(pyr_treated_WaGa_24h_sig_FC_down)<-pyr_treated_WaGa_24h_sig_DEGs_down$entrezgene_id

pyr_kegg_WaGa_24h_result_down<-enrichKEGG(pyr_treated_WaGa_24h_sig_DEGs_down$entrezgene_id,
                                pAdjustMethod = "BH",
                                pvalueCutoff  = 0.05,
                                qvalueCutoff  = 0.25
)
#print(head(pyr_kegg_WaGa_24h_result_down@result))

edox_down_WaGa_24h<-setReadable(pyr_kegg_WaGa_24h_result_down, "org.Hs.eg.db", "ENTREZID")

p_WaGa_24h_down<-enrichplot::cnetplot(edox_down_WaGa_24h,
             foldChange = pyr_treated_WaGa_24h_sig_FC_down,
             layout = "kk",
             color_category = "#edf8e9",
             cex_category = 2,
             showCategory = 8,
             cex_label_category = 1.2,
             cex_label_gene = 1.2,
             categortSize = "pvalue")+
  scale_color_gradient2(name='fold change', midpoint = -3,  high='#deebf7', mid='#9ecae1', low='#08519c')
p_WaGa_24h_down<- p_WaGa_24h_down + theme(strip.text.x = element_text(size = 22, face = "bold"),
                text = element_text(size = 22, face = "bold"))

p_WaGa_24h_down

5.3 Visualizing the KEGG pathway genes with cnetplot for both cell lines

pyr_kegg_result<-compareCluster(entrezgene_id~Cell_line+group,
                                data = pyr_treated_sig_DEGs,
                                fun = enrichKEGG,
                                pAdjustMethod = "BH",
                                pvalueCutoff  = 0.05,
                                qvalueCutoff  = 0.25
)

datatable(data = pyr_kegg_result@compareClusterResult)
edox<-setReadable(pyr_kegg_result, "org.Hs.eg.db", "ENTREZID")

cnt_enrichment <- enrichplot::cnetplot(edox,
                                       cex_gene = 1.5,
                                       cex_label_gene = 1.2,
                                       cex_label_category = 2,
                                       colorEdge = T,
                                       )+
  theme(legend.text=element_text(size=10, face = "bold"),
        legend.title = element_text(size=12, face = "bold"),
        text = element_text(face = "bold", color = "black"))+
  scale_fill_manual(values=c("MKL1.down"="#46B8DA", "MKL1.up"="#D43F3A", "WaGa.down"="#357EBD","WaGa.up" = "#EEA236"))+
  scale_colour_manual(values=c("MKL1.down"="#46B8DA", "MKL1.up"="#D43F3A", "WaGa.down"="#357EBD", "WaGa.up" = "#EEA236"), 
                      labels=c("MKL1.down", "MKL1.up", "WaGa.down", "WaGa.up"))

cnt_enrichment

6 Running viper to predict TFs activity on pyrvinium treated MCC RNAseq data and performing two samples ttest between conditions.

prior = dorothea_hs[dorothea_hs$confidence %in% c("A","B","C"),]
regulon = df2regulon(prior)
#WaGa cell treated with DMSO and pyrvinium 
expr = readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_waga_realigned_normalized_edat.RDS")
eset = expr[,-c(1:3)]
vpres = viper(eset = eset, regulon = regulon, 
              minsize = 10, nes = T, method = 'none', 
              eset.filter = T, pleiotropy = F, verbose = F)

res = rowTtest(x = vpres[,7:12],y = vpres[,1:6])
padj = p.adjust(res$p.value,method = "BH")
names(padj) = rownames(res$p.value)

WaGa_pyr_vs_DMSO_vpres<-data.frame(TFs = rownames(res$statistic),`t_statistics` = res$statistic, `p.adjust` = padj, sign = sign(res$statistic))
WaGa_pyr_vs_DMSO_vpres[WaGa_pyr_vs_DMSO_vpres$sign == "-1","sign"]<-"down"
WaGa_pyr_vs_DMSO_vpres[WaGa_pyr_vs_DMSO_vpres$sign == "1","sign"]<-"up"

df<-na.omit(WaGa_pyr_vs_DMSO_vpres)
cols<-c("blue","firebrick")

  p1 = ggplot(data = df, 
              aes(x=reorder(TFs, log10(p.adjust)), y=-log10(p.adjust), color = sign, size = abs(t_statistics)*2),
              label = gene)+
    geom_point()+
    scale_color_manual(values = cols)+
    geom_text_repel(data = df, aes(label = TFs), size = sqrt(abs(df$t_statistics))*3, fontface = "bold", colour = ifelse(df$sign == "up", 'firebrick', 'blue'), force = 3, max.overlaps = 50) +
    ylab(paste0("-log10(p.adjust)"))+
    xlab("Transcription Factors")+
    scale_x_discrete(expand = c(0.15,0))+
    lims(y = c(0, 5.5))+
    ggtitle("Transcritpion factors activity -  Pyrvinium vs. DMSO in WaGa cells ")+
    theme(axis.text.x = element_blank(),
          legend.position="none",
          plot.title = element_text(size = 30, face = "bold"),
          panel.background = element_blank(),
          axis.line.y = element_line(),
          #plot.margin = unit(c(1,1,1,1), "cm"),
          axis.title.x = element_text(size=30, face="bold"),
          axis.title.y = element_text(size=30, face="bold"),
          axis.text.y = element_text(size=30, face="bold"))
  
  
#MKL1 cell treated with DMSO and pyrvinium 
expr = readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/pyr_treated_mcc/Pyr_treatment_MKL1_realigned_normalized_edat.RDS")
eset = expr[,-c(1:3)]
vpres = viper(eset = eset, regulon = regulon, 
              minsize = 10, nes = T, method = 'none', 
              eset.filter = T, pleiotropy = F, verbose = F)

res = rowTtest(x = vpres[,7:12],y = vpres[,1:6])
padj = p.adjust(res$p.value,method = "BH")
names(padj) = rownames(res$p.value)

MKL1_pyr_vs_DMSO_vpres<-data.frame(TFs = rownames(res$statistic),`t_statistics` = res$statistic, `p.adjust` = padj, sign = sign(res$statistic))

MKL1_pyr_vs_DMSO_vpres[MKL1_pyr_vs_DMSO_vpres$sign == "-1","sign"]<-"down"
MKL1_pyr_vs_DMSO_vpres[MKL1_pyr_vs_DMSO_vpres$sign == "1","sign"]<-"up"

df<-na.omit(MKL1_pyr_vs_DMSO_vpres)
cols<-c("blue","firebrick")

  p2 = ggplot(data = df, 
              aes(x=reorder(TFs, log10(p.adjust)), y=-log10(p.adjust), color = sign, size = abs(t_statistics)*2),
              label = gene)+
    geom_point()+
    scale_color_manual(values = cols)+
    geom_text_repel(data = df, aes(label = TFs), size = sqrt(abs(df$t_statistics))*3, fontface = "bold", colour = ifelse(df$sign == "up", 'firebrick', 'blue'), force = 3, max.overlaps = 50) +
    ylab(paste0("-log10(p.adjust)"))+
    xlab("Transcription Factors")+
    scale_x_discrete(expand = c(0.15,0))+
    lims(y = c(0, 3))+
    ggtitle("Transcritpion factors activity -  Pyrvinium vs. DMSO in MKL1 cells ")+
    theme(axis.text.x = element_blank(),
          legend.position="none",
          plot.title = element_text(size = 30, face = "bold"),
          panel.background = element_blank(),
          axis.line.y = element_line(),
          #plot.margin = unit(c(1,1,1,1), "cm"),
          axis.title.x = element_text(size=30, face="bold"),
          axis.title.y = element_text(size=30, face="bold"),
          axis.text.y = element_text(size=30, face="bold"))

Viper_TF_activity_plot<-ggarrange(p1, p2,
              ncol = 2,
              nrow = 1
) 
Viper_TF_activity_plot